PROJECT IS UNDER CONSTRUCTION
Project uses R vizualization and Python scripting together
Libraries and reading in clean data csv file
Note: DAYS = Baseline audit, monday preparty, SG day 1, and SG day 2
knitr::opts_chunk$set(warning=FALSE, message=FALSE)
####### ALL LIBRARIES USED ###########
###### DATA MANIPULATION ######
options(stringsAsFactors = FALSE)
library(dplyr) # for main data manipulations
library(reticulate) # for python script
library(reshape2) # for dcast
library(lubridate) # for hours
source_python("euclidean_script.py") # python script
###### GRAPHING ######
library(hrbrthemes)
library(RColorBrewer) # for colors
library(ggplot2) # for lollipop graph
library(sunburstR) # for sunburst graph
library(streamgraph) # for streamgraph
library(heatmaply) # for heatmap
library(DT) # for data table
library(leaflet) # for leaflet map
library(htmltools) # for leaflet map
######################################
# setup to get all types
# will be used throughout code
collection = c('trashCan', 'recyclingCan', 'tobaccoAshCan')
litter = c('paper', 'tobacco', 'unknown', 'plastic', 'food', 'glass')
# icon png files
icons = c("cigarette.png", "cigarette.png", "plastic-bottle.png", "cigarette.png", "cigarette.png", "cigarette.png")
# load in cleaned raw data
raw <- read.csv('clean/clean_rubbish.csv') %>%
mutate( # change days from shorthand to full
day = factor(ifelse(day =="Sun", "Sunday",
ifelse(day =="Mon", "Monday",
ifelse(day == "Tue", "Tuesday", "Wednesday"))),
levels = c("Sunday", "Monday", "Tuesday", "Wednesday"))
)# prepping data for plot
plot_data<- raw %>%
subset(city == 'Redwood City') %>%
subset(is_litter == 1) %>%
group_by(rubbishType, day) %>%
summarise(
num_litter = n()
) %>%
group_by(day) %>%
arrange(-num_litter) %>%
ungroup() %>%
arrange(day, num_litter) %>%
mutate(order = row_number())
# plotting lollipop graphs
plot_data %>%
ggplot() +
geom_segment( aes(x=order, xend=order, y=0, yend=num_litter), color="pink", size = 1.5) +
geom_point( aes(x=order, y=num_litter, color="pink"), size=5 ) +
coord_flip()+
theme_ipsum() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position = "none",
panel.border = element_blank(),
panel.spacing = unit(0.1, "lines"),
strip.text.x = element_text(size = 15)
) +
xlab("") +
ylab("Pieces of Litter Collected") +
ggtitle("Total Litter Collected by Rubbish Team") +
facet_wrap(~day, ncol=1, scale="free_y") +
scale_x_continuous(
breaks = plot_data$order,
labels = plot_data$rubbishType)# CHANGING IMAGES
#library("ggimage")
# image links must be in dataframe column
#geom_image(aes(image=image), size=.05)GRAPH ABOVE:
NEED:
# clustering data points with python script
clustered_data <- raw %>%
subset(city == 'Redwood City') %>% # this is a different area
get_euclidean(collection, litter) # python function
# getting details on clustered data
clusters <- clustered_data %>%
subset(is_litter == 1) %>% # only want objects
group_by(closest_cent, day) %>%
summarise(
cent_type = max(cent_type),
num_litter = n(),
mean_dist = round(mean(distance),2),
median_dist = round(median(distance),2),
sd_dist = round(sd(distance),2),
max_dist = round(max(distance),2),
lat = mean(lat),
long = mean(long)
) %>%
arrange(-num_litter)clustering preformed
# getting data for sunburst plot
clustered_data %>%
subset(is_litter == 1) %>%
group_by(cent_type, day, rubbishType) %>%
summarise(
num_litter = n()
) %>%
mutate(
path = paste(cent_type, day, rubbishType, sep="-")
) %>%
subset(select = c(path, num_litter)) %>%
# plotting
sunburst(legend=TRUE,
colors = rev(brewer.pal(9, "RdPu")),
count = TRUE
)This shows the breakdown of clusterings, from Collection Object Type, to day of the week, then finally to the litter type.
get_hour <- function(time){
# function used to save space
return (hour(as.POSIXct(time, format="%Y-%m-%d %H:%M:%S")))
}
# getting plot data for streamgraph
plot_data <- clustered_data %>%
subset(is_litter==1) %>% # only want objects
mutate( # getting hours of days, and making them concurrent
time = ifelse(day=="Sunday", get_hour(time),
ifelse(day=="Monday", get_hour(time)+24,
ifelse(day=="Tuesday", get_hour(time)+48,
get_hour(time)+72))),
closest_cent = paste("Collection ID:", closest_cent, sep=" ")
) %>%
group_by(closest_cent,time) %>%
summarise(
num_litter = n()
) %>%
arrange(-num_litter)
# plotting Streamgraph
plot_data %>%
streamgraph(
"closest_cent", "num_litter", "time",
interpolate="cardinal", scale = "continuous", width="800px"
) %>%
sg_legend(show=TRUE, label="Collection ID: ") %>%
sg_fill_brewer("RdPu")This graph represents the amount of litter collected per hour, for each collection object.
GRAPH ABOVE:
# prepping data for heatmap plot
plot_data <- clusters %>%
subset(select = c(day, closest_cent, mean_dist)) %>%
dcast(closest_cent ~ day, value.var = "mean_dist") %>%
mutate( # chaning na values to 0
Sunday = ifelse(is.na(Sunday),0,Sunday),
Monday = ifelse(is.na(Monday),0,Monday),
Tuesday = ifelse(is.na(Tuesday),0,Tuesday),
Wednesday = ifelse(is.na(Wednesday),0,Wednesday)
)
# changing rownames to centroid id
rownames(plot_data) <- plot_data[,"closest_cent"]
# making dataframe into matrix
plot_data <- plot_data %>%
subset(select = -c(closest_cent)) %>%
as.matrix() %>%
t() # flip matrix
# plotting heatmap
plot_data %>%
heatmaply(
plot_method = "plotly",
colors = colorRampPalette(rev(brewer.pal(9, "RdPu"))),
dendogram = "column",
show_dendrogram = c(FALSE, FALSE),
label_names = c("Day", "Collection ID", "Mean Distance"),
grid_color = "white",
main = "Mean Distance of Litter to Closest Collection Object",
xlab = "Collection Objects",
key.title = "meters",
showticklabels = c(FALSE, TRUE),
colorbar_len = .8,
grid_gap = 1
) %>%
layout(width=800)GRAPH ABOVE:
# getting data for data table
clustered_data %>%
subset(obj_id != -1) %>% # only want objects
group_by(closest_cent) %>%
summarise(
cent_type = max(cent_type),
num_litter = n(),
mean_dist = round(mean(distance),2),
median_dist = round(median(distance),2),
sd_dist = round(sd(distance),2),
max_dist = round(max(distance),2)
) %>%
arrange(-num_litter) %>%
rename( # renaming columns
"Collection ID" = closest_cent,
"Collection Type" = cent_type,
"Number of Litter" = num_litter,
"Mean Distance (m)" = mean_dist,
"Median Distance (m)" = median_dist,
"Standard Deviation (m)" = sd_dist,
"Max Distance (m)" = max_dist
) %>%
# plotting data table
datatable()colors <- colorNumeric(
# function for color of collection types
palette = c('#134a47', '#2859b8', 'green'),
domain = c(1,2,3)
)
labs <- lapply(seq(nrow(clusters)), function(i) {
# function for mouseover of circles
paste0( 'Collection Type: ', clusters[i, "cent_type"], '<p>Number of Litter Objects: ',
clusters[i, "num_litter"], '</p>Mean Distance: ',
round(clusters[i, "mean_dist"],2),' meters<p>Max Distance: ',
round(clusters[i, "max_dist"],2), ' meters</p>' )
})
html_legend <- "<img src='open-trash-can.png' style='width:20px;height:20px;'> Trash & Recycling<br/>
<img src='open-trash-can.png' style='width:20px;height:20px;'> Paper<br/>
<img src='cigarette.png' style='width:20px;height:20px;'> Tobacco<br/>
<img src='open-trash-can.png' style='width:20px;height:20px;'> Unknown<br/>
<img src='open-trash-can.png' style='width:20px;height:20px;'> Plastic<br/>
<img src='open-trash-can.png' style='width:20px;height:20px;'> Food<br/>
<img src='open-trash-can.png' style='width:20px;height:20px;'> Glass"
map <- clusters %>%
mutate(
cent_type = ifelse(cent_type == 'trashCan', 1,
ifelse(cent_type == 'recyclingCan', 2, 3))
) %>%
leaflet(width = "100%") %>%
setView(lng = -122.2298, lat = 37.48650, zoom = 18) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addCircles(
~long, ~lat,
radius = ~max_dist,
color = ~colors(cent_type),
opacity = ~num_litter/357,
label = lapply(labs, HTML),
group = "Collections"
) %>%
addMarkers(lng = ~long, lat = ~lat,
icon = makeIcon("open-trash-can.png", "open-trash-can.png", 15,15),
group = "Collections"
) %>%
addLayersControl(
overlayGroups = c("Collections", litter),
options = layersControlOptions(collapsed = FALSE)
) %>%
addControl(html = html_legend, position = "bottomleft")
for (i in 1:length(litter)){
map <- map %>%
addMarkers(data = clustered_data %>%
subset(rubbishType == litter[[i]]),
lng = ~long, lat = ~lat,
icon = makeIcon(icons[[i]], icons[[i]], 7,7),
group = litter[[i]]
)
}
mapffffffffffffffffffffffffffffff
map looks disgusting
hexbin?
A work by Alexander Kahanek x Rubbish, co.